from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

# 示例文本数据
sentences = [
    "I love natural language processing.",
    "Word2Vec is a powerful tool for NLP.",
    "Deep learning models are fascinating.",
    "I enjoy learning about AI and machine learning."
]

# 预处理文本数据（分词）
processed_sentences = [simple_preprocess(sentence) for sentence in sentences]

# 训练 Word2Vec 模型
model = Word2Vec(
    sentences=processed_sentences,  # 输入数据
    vector_size=100,               # 词向量维度
    window=5,                      # 上下文窗口大小
    min_count=1,                   # 忽略出现次数少于 min_count 的词语
    workers=4,                     # 并行线程数
    sg=1                           # 1 表示 Skip-Gram，0 表示 CBOW
)

# 保存模型
model.save("word2vec.model")

# 加载模型
# model = Word2Vec.load("word2vec.model")